{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "os.environ[\"CUDA_VISIBLE_DEVICES\"]=\"3\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pickle\n",
    "import youtokentome as yttm\n",
    "import json\n",
    "from sklearn.preprocessing import LabelEncoder\n",
    "from sklearn.naive_bayes import ComplementNB\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open('bow-language-detection.pkl', 'rb') as fopen:\n",
    "    bow = pickle.load(fopen)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<1x400000 sparse matrix of type '<class 'numpy.int64'>'\n",
       "\twith 4 stored elements in Compressed Sparse Row format>"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "v = bow.transform(['▁dengan ▁stim ▁dan ▁pengeluaran'])\n",
    "v"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "dict_keys(['train_X', 'test_X', 'train_Y', 'test_Y'])"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "with open('train-test.json') as fopen:\n",
    "    train_test = json.load(fopen)\n",
    "    \n",
    "train_test.keys()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_Y = LabelEncoder().fit_transform(train_test['train_Y'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "test_Y = LabelEncoder().fit_transform(train_test['test_Y'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "bpe = yttm.BPE(model='language-detection.model')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'other'"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_test['train_Y'][8]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "18918596"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "subs = [' '.join(s) for s in bpe.encode(train_test['train_X'], output_type=yttm.OutputType.SUBWORD)]\n",
    "len(subs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "4729650"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test_subs = [' '.join(s) for s in bpe.encode(train_test['test_X'], output_type=yttm.OutputType.SUBWORD)]\n",
    "len(test_subs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_X = bow.transform(subs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "test_X = bow.transform(test_subs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "import tensorflow as tf"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "def convert_sparse_matrix_to_sparse_tensor(X):\n",
    "    coo = X.tocoo()\n",
    "    indices = np.mat([coo.row, coo.col]).transpose()\n",
    "    # coo.data[coo.data > limit] = limit\n",
    "    return tf.SparseTensorValue(indices, coo.col, coo.shape), tf.SparseTensorValue(indices, coo.data, coo.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "class Model:\n",
    "    def __init__(self, learning_rate, dimension = 32, output = 6):\n",
    "        self.X = tf.sparse_placeholder(tf.int32)\n",
    "        self.W = tf.sparse_placeholder(tf.int32)\n",
    "        self.Y = tf.placeholder(tf.int32, [None])\n",
    "        embeddings = tf.Variable(tf.truncated_normal([train_X.shape[1],dimension]))\n",
    "        embed = tf.nn.embedding_lookup_sparse(embeddings, self.X, self.W, combiner='mean')\n",
    "        self.embed = embed\n",
    "        self.logits = tf.layers.dense(embed, output)\n",
    "        self.cost = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(\n",
    "            logits = self.logits, labels = self.Y))\n",
    "        self.optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(self.cost)\n",
    "        correct_pred = tf.equal(tf.argmax(self.logits, 1,output_type=tf.int32), self.Y)\n",
    "        self.accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "WARNING:tensorflow:From /home/husein/.local/lib/python3.6/site-packages/tensorflow_core/python/ops/embedding_ops.py:515: div (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "Deprecated in favor of operator or tf.math.divide.\n",
      "WARNING:tensorflow:From <ipython-input-16-f18d73326ce7>:9: dense (from tensorflow.python.layers.core) is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "Use keras.layers.Dense instead.\n",
      "WARNING:tensorflow:From /home/husein/.local/lib/python3.6/site-packages/tensorflow_core/python/layers/core.py:187: Layer.apply (from tensorflow.python.keras.engine.base_layer) is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "Please use `layer.__call__` method instead.\n"
     ]
    }
   ],
   "source": [
    "tf.reset_default_graph()\n",
    "sess = tf.InteractiveSession()\n",
    "model = Model(1e-3)\n",
    "sess.run(tf.global_variables_initializer())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "batch_size = 256\n",
    "epoch = 10"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'lang-detection-w/model.ckpt'"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "saver = tf.train.Saver(tf.trainable_variables())\n",
    "saver.save(sess, 'lang-detection-w/model.ckpt')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "train minibatch loop: 100%|██████████| 73901/73901 [07:16<00:00, 169.40it/s, accuracy=0.974, cost=0.123] \n",
      "test minibatch loop: 100%|██████████| 18476/18476 [01:10<00:00, 260.37it/s, accuracy=0.98, cost=0.043]  \n",
      "train minibatch loop:   0%|          | 18/73901 [00:00<07:14, 170.19it/s, accuracy=0.977, cost=0.0879]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "time taken: 507.2389669418335\n",
      "epoch: 0, training loss: 0.168605, training acc: 0.949238, valid loss: 0.103548, valid acc: 0.972824\n",
      "\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "train minibatch loop: 100%|██████████| 73901/73901 [07:16<00:00, 169.34it/s, accuracy=0.98, cost=0.101]  \n",
      "test minibatch loop: 100%|██████████| 18476/18476 [01:11<00:00, 259.92it/s, accuracy=1, cost=0.0307]    \n",
      "train minibatch loop:   0%|          | 18/73901 [00:00<07:07, 172.92it/s, accuracy=0.969, cost=0.103] "
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "time taken: 507.52713799476624\n",
      "epoch: 1, training loss: 0.093084, training acc: 0.975911, valid loss: 0.094879, valid acc: 0.975816\n",
      "\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "train minibatch loop: 100%|██████████| 73901/73901 [07:15<00:00, 169.68it/s, accuracy=0.98, cost=0.0923] \n",
      "test minibatch loop: 100%|██████████| 18476/18476 [01:11<00:00, 259.97it/s, accuracy=1, cost=0.0261]    \n",
      "train minibatch loop:   0%|          | 18/73901 [00:00<07:02, 174.90it/s, accuracy=0.973, cost=0.0773]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "time taken: 506.637300491333\n",
      "epoch: 2, training loss: 0.086510, training acc: 0.977947, valid loss: 0.092312, valid acc: 0.976671\n",
      "\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "train minibatch loop: 100%|██████████| 73901/73901 [07:16<00:00, 169.16it/s, accuracy=0.98, cost=0.0861] \n",
      "test minibatch loop: 100%|██████████| 18476/18476 [01:10<00:00, 260.24it/s, accuracy=1, cost=0.0229]    \n",
      "train minibatch loop:   0%|          | 18/73901 [00:00<06:58, 176.73it/s, accuracy=0.969, cost=0.105] "
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "time taken: 507.90590500831604\n",
      "epoch: 3, training loss: 0.083693, training acc: 0.978807, valid loss: 0.091118, valid acc: 0.977115\n",
      "\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "train minibatch loop: 100%|██████████| 73901/73901 [07:14<00:00, 169.92it/s, accuracy=0.98, cost=0.0815] \n",
      "test minibatch loop: 100%|██████████| 18476/18476 [01:11<00:00, 259.59it/s, accuracy=1, cost=0.0207]    \n",
      "train minibatch loop:   0%|          | 18/73901 [00:00<07:11, 171.40it/s, accuracy=0.969, cost=0.105] "
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "time taken: 506.1323070526123\n",
      "epoch: 4, training loss: 0.082089, training acc: 0.979305, valid loss: 0.090457, valid acc: 0.977387\n",
      "\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "train minibatch loop: 100%|██████████| 73901/73901 [07:15<00:00, 169.50it/s, accuracy=0.98, cost=0.078]  \n",
      "test minibatch loop: 100%|██████████| 18476/18476 [01:10<00:00, 260.34it/s, accuracy=1, cost=0.0194]    \n",
      "train minibatch loop:   0%|          | 18/73901 [00:00<07:11, 171.23it/s, accuracy=0.98, cost=0.0619] "
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "time taken: 506.99291372299194\n",
      "epoch: 5, training loss: 0.081031, training acc: 0.979639, valid loss: 0.090023, valid acc: 0.977549\n",
      "\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "train minibatch loop: 100%|██████████| 73901/73901 [07:16<00:00, 169.14it/s, accuracy=0.98, cost=0.0742] \n",
      "test minibatch loop: 100%|██████████| 18476/18476 [01:11<00:00, 259.65it/s, accuracy=1, cost=0.0188]    \n",
      "train minibatch loop:   0%|          | 18/73901 [00:00<07:07, 172.77it/s, accuracy=0.973, cost=0.103] "
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "time taken: 508.1200499534607\n",
      "epoch: 6, training loss: 0.080218, training acc: 0.979908, valid loss: 0.089695, valid acc: 0.977713\n",
      "\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "train minibatch loop: 100%|██████████| 73901/73901 [07:14<00:00, 170.19it/s, accuracy=0.98, cost=0.0709] \n",
      "test minibatch loop: 100%|██████████| 18476/18476 [01:11<00:00, 259.44it/s, accuracy=1, cost=0.0187]    \n",
      "train minibatch loop:   0%|          | 18/73901 [00:00<07:08, 172.29it/s, accuracy=0.977, cost=0.102] "
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "time taken: 505.47494888305664\n",
      "epoch: 7, training loss: 0.079552, training acc: 0.980120, valid loss: 0.089535, valid acc: 0.977784\n",
      "\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "train minibatch loop: 100%|██████████| 73901/73901 [07:14<00:00, 170.15it/s, accuracy=0.98, cost=0.0685] \n",
      "test minibatch loop: 100%|██████████| 18476/18476 [01:10<00:00, 260.69it/s, accuracy=1, cost=0.0188]    \n",
      "train minibatch loop:   0%|          | 18/73901 [00:00<06:59, 175.99it/s, accuracy=0.977, cost=0.101] "
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "time taken: 505.2287175655365\n",
      "epoch: 8, training loss: 0.079030, training acc: 0.980288, valid loss: 0.089536, valid acc: 0.977805\n",
      "\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "train minibatch loop: 100%|██████████| 73901/73901 [07:16<00:00, 169.30it/s, accuracy=0.98, cost=0.0668] \n",
      "test minibatch loop: 100%|██████████| 18476/18476 [01:11<00:00, 259.80it/s, accuracy=1, cost=0.0193]    \n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "time taken: 507.66829776763916\n",
      "epoch: 9, training loss: 0.078613, training acc: 0.980419, valid loss: 0.089660, valid acc: 0.977789\n",
      "\n"
     ]
    }
   ],
   "source": [
    "import time\n",
    "from tqdm import tqdm\n",
    "\n",
    "for e in range(epoch):\n",
    "    lasttime = time.time()\n",
    "    train_acc, train_loss, test_acc, test_loss = [], [], [], []\n",
    "    pbar = tqdm(\n",
    "        range(0, train_X.shape[0], batch_size), desc = 'train minibatch loop'\n",
    "    )\n",
    "    for i in pbar:\n",
    "        batch_x = convert_sparse_matrix_to_sparse_tensor(train_X[i : min(i + batch_size, train_X.shape[0])])\n",
    "        batch_y = train_Y[i : min(i + batch_size, train_X.shape[0])]\n",
    "        acc, cost, _ = sess.run(\n",
    "            [model.accuracy, model.cost, model.optimizer],\n",
    "            feed_dict = {\n",
    "                model.X: batch_x[0],\n",
    "                model.W: batch_x[1],\n",
    "                model.Y: batch_y\n",
    "            },\n",
    "        )\n",
    "        assert not np.isnan(cost)\n",
    "        train_loss.append(cost)\n",
    "        train_acc.append(acc)\n",
    "        pbar.set_postfix(cost = cost, accuracy = acc)\n",
    "    \n",
    "    pbar = tqdm(range(0, test_X.shape[0], batch_size), desc = 'test minibatch loop')\n",
    "    for i in pbar:\n",
    "        batch_x = convert_sparse_matrix_to_sparse_tensor(test_X[i : min(i + batch_size, test_X.shape[0])])\n",
    "        batch_y = test_Y[i : min(i + batch_size, test_X.shape[0])]\n",
    "        batch_x_expand = np.expand_dims(batch_x,axis = 1)\n",
    "        acc, cost = sess.run(\n",
    "            [model.accuracy, model.cost],\n",
    "            feed_dict = {\n",
    "                model.X: batch_x[0],\n",
    "                model.W: batch_x[1],\n",
    "                model.Y: batch_y\n",
    "            },\n",
    "        )\n",
    "        test_loss.append(cost)\n",
    "        test_acc.append(acc)\n",
    "        pbar.set_postfix(cost = cost, accuracy = acc)\n",
    "\n",
    "    train_loss = np.mean(train_loss)\n",
    "    train_acc = np.mean(train_acc)\n",
    "    test_loss = np.mean(test_loss)\n",
    "    test_acc = np.mean(test_acc)\n",
    "    \n",
    "    print('time taken:', time.time() - lasttime)\n",
    "    print(\n",
    "        'epoch: %d, training loss: %f, training acc: %f, valid loss: %f, valid acc: %f\\n'\n",
    "        % (e, train_loss, train_acc, test_loss, test_acc)\n",
    "    )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[1.0,\n",
       " 0.019317048,\n",
       " array([[1.84335859e-05, 1.39014555e-09, 1.04053121e-04, 2.16701878e-07,\n",
       "         9.99714434e-01, 1.62753495e-04],\n",
       "        [9.76559639e-01, 5.64002767e-06, 6.04392244e-06, 2.34174430e-02,\n",
       "         5.90871352e-08, 1.12169728e-05],\n",
       "        [3.14420220e-07, 8.30683261e-02, 1.10712834e-01, 1.18992460e-09,\n",
       "         1.53369969e-03, 8.04684877e-01],\n",
       "        [1.15116722e-10, 1.63969863e-02, 9.56447959e-01, 4.57371727e-12,\n",
       "         1.53386327e-06, 2.71535385e-02],\n",
       "        [9.90425646e-01, 2.91815994e-08, 3.31482725e-13, 9.50208399e-03,\n",
       "         6.22980920e-08, 7.21805773e-05],\n",
       "        [3.67394226e-10, 1.41238458e-02, 9.78182018e-01, 1.79507581e-12,\n",
       "         1.75307184e-06, 7.69246696e-03],\n",
       "        [9.99271452e-01, 1.35744153e-06, 1.43437719e-04, 5.67995885e-04,\n",
       "         8.24067513e-07, 1.49628686e-05],\n",
       "        [9.84049559e-01, 1.24926373e-05, 1.76603135e-04, 1.55735407e-02,\n",
       "         1.02394843e-04, 8.55438993e-05],\n",
       "        [1.27655198e-09, 5.64305713e-09, 3.10502263e-10, 1.31050450e-08,\n",
       "         1.00000000e+00, 2.44416033e-11],\n",
       "        [2.50152264e-12, 4.70848499e-06, 9.99989867e-01, 1.42135974e-14,\n",
       "         7.03970215e-08, 5.43754186e-06],\n",
       "        [1.74444780e-04, 4.49014478e-04, 9.98713136e-01, 4.80127983e-06,\n",
       "         1.09056941e-06, 6.57500001e-04],\n",
       "        [9.09927422e-08, 9.91724312e-01, 7.81107135e-03, 9.12467379e-10,\n",
       "         8.98912276e-06, 4.55449335e-04],\n",
       "        [7.23725452e-06, 3.71089741e-03, 9.83361781e-01, 5.43822498e-09,\n",
       "         1.95646426e-05, 1.29005034e-02],\n",
       "        [4.66640631e-05, 6.78812739e-06, 8.50473298e-04, 5.39869461e-07,\n",
       "         9.98970389e-01, 1.25168764e-04],\n",
       "        [9.86348987e-01, 2.20826428e-06, 2.50932986e-09, 1.36486478e-02,\n",
       "         1.40673563e-07, 1.38694523e-08],\n",
       "        [3.29804948e-06, 5.02487179e-04, 9.86802876e-01, 8.46435455e-08,\n",
       "         1.35286117e-03, 1.13383122e-02],\n",
       "        [4.21806590e-09, 9.70884634e-04, 9.96301532e-01, 2.68642036e-11,\n",
       "         2.49236455e-05, 2.70258798e-03],\n",
       "        [1.24839417e-09, 2.15180360e-10, 1.44102771e-07, 1.07513573e-11,\n",
       "         9.99999881e-01, 1.38831435e-08],\n",
       "        [2.32152638e-06, 1.43690845e-02, 9.53602552e-01, 3.46152684e-09,\n",
       "         5.91792748e-04, 3.14342082e-02],\n",
       "        [2.06501678e-08, 3.45066888e-04, 9.96495664e-01, 6.44386278e-14,\n",
       "         1.45846138e-06, 3.15780123e-03],\n",
       "        [6.19314183e-11, 1.36719525e-11, 4.24014868e-09, 1.00901128e-10,\n",
       "         1.00000000e+00, 4.13333145e-09],\n",
       "        [2.00421520e-04, 6.94707909e-04, 9.95434105e-01, 2.87229095e-12,\n",
       "         1.17929027e-07, 3.67067382e-03],\n",
       "        [1.47433319e-08, 6.15158202e-11, 3.85271868e-08, 1.28624478e-09,\n",
       "         9.99999642e-01, 3.25890312e-07],\n",
       "        [9.99943376e-01, 1.58532293e-06, 2.90119146e-07, 4.47690436e-05,\n",
       "         9.06650985e-06, 9.30201566e-07],\n",
       "        [7.95085739e-08, 3.81874223e-03, 9.87072051e-01, 3.08724046e-10,\n",
       "         8.68149567e-04, 8.24106392e-03],\n",
       "        [1.44792000e-06, 9.94186759e-01, 4.67687054e-03, 4.59097960e-09,\n",
       "         8.13127917e-05, 1.05359487e-03],\n",
       "        [1.15803054e-07, 5.27113583e-03, 9.39057171e-01, 4.54971685e-11,\n",
       "         9.01643507e-05, 5.55814430e-02],\n",
       "        [1.46754502e-07, 1.73293091e-02, 9.76962686e-01, 1.31261009e-11,\n",
       "         3.10130781e-05, 5.67688653e-03],\n",
       "        [1.95139123e-08, 1.00000000e+00, 5.52973686e-15, 5.29745969e-10,\n",
       "         1.16119772e-08, 1.17161919e-14],\n",
       "        [9.23415542e-01, 8.84233984e-08, 1.14987811e-11, 7.60771632e-02,\n",
       "         4.57471797e-05, 4.61373420e-04],\n",
       "        [5.52124670e-03, 5.96453901e-04, 1.15553573e-01, 8.64453256e-01,\n",
       "         8.04194156e-03, 5.83340414e-03],\n",
       "        [2.38605469e-10, 1.03292111e-02, 9.85953093e-01, 1.99409282e-12,\n",
       "         5.37918368e-07, 3.71723948e-03],\n",
       "        [6.36091499e-07, 8.57586856e-04, 9.77541387e-01, 1.60946789e-09,\n",
       "         2.87076773e-06, 2.15975456e-02],\n",
       "        [5.75633230e-08, 6.92978268e-04, 9.91717815e-01, 4.65731566e-13,\n",
       "         1.56184914e-08, 7.58910784e-03],\n",
       "        [5.89370984e-06, 9.99951482e-01, 3.93656046e-05, 3.60491237e-09,\n",
       "         1.55891428e-06, 1.76148342e-06],\n",
       "        [7.13159032e-10, 1.10647707e-05, 9.99678493e-01, 1.54388085e-13,\n",
       "         1.80728648e-07, 3.10208648e-04],\n",
       "        [9.87688839e-01, 6.27077668e-07, 5.57847488e-06, 1.23018241e-02,\n",
       "         1.43296586e-06, 1.73018395e-06],\n",
       "        [1.36389886e-07, 2.58755055e-04, 9.99475300e-01, 3.01341032e-08,\n",
       "         1.06526402e-11, 2.65766517e-04],\n",
       "        [3.20602851e-08, 1.18705515e-07, 1.12555654e-09, 1.18864385e-08,\n",
       "         9.99999881e-01, 2.18573355e-08],\n",
       "        [2.04456455e-07, 9.99999404e-01, 7.94796173e-11, 9.66158334e-11,\n",
       "         3.43843880e-07, 6.73675907e-11],\n",
       "        [9.99779761e-01, 4.19613073e-08, 2.64924815e-09, 2.16945293e-04,\n",
       "         8.62656933e-08, 3.19901073e-06],\n",
       "        [3.35609773e-03, 1.04463259e-02, 9.53081965e-01, 7.33436627e-06,\n",
       "         2.49537115e-04, 3.28586400e-02],\n",
       "        [8.19914590e-07, 3.27343890e-03, 9.61677849e-01, 8.63996593e-06,\n",
       "         6.17618673e-04, 3.44217271e-02],\n",
       "        [5.10334841e-09, 1.96719589e-03, 9.82458949e-01, 1.74896827e-12,\n",
       "         1.39643607e-05, 1.55598270e-02],\n",
       "        [6.60903461e-05, 1.43398733e-08, 5.19658061e-05, 2.83931257e-07,\n",
       "         9.97935414e-01, 1.94618467e-03],\n",
       "        [1.10579217e-08, 3.29870953e-09, 5.93151924e-07, 6.91242730e-10,\n",
       "         9.99999166e-01, 2.91217845e-07],\n",
       "        [2.97964016e-06, 2.46094074e-03, 9.79860902e-01, 3.34874281e-11,\n",
       "         1.71378651e-05, 1.76579710e-02],\n",
       "        [3.78346741e-08, 2.71846648e-06, 6.50021029e-05, 4.04130027e-08,\n",
       "         9.99922156e-01, 1.00012649e-05],\n",
       "        [9.95479810e-11, 5.97563994e-05, 9.99373496e-01, 1.45126072e-13,\n",
       "         5.81657815e-08, 5.66692965e-04],\n",
       "        [1.81568911e-08, 1.73160792e-04, 9.99265254e-01, 1.43982409e-10,\n",
       "         2.00096437e-08, 5.61494671e-04]], dtype=float32)]"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sess.run(\n",
    "    [model.accuracy, model.cost, tf.nn.softmax(model.logits)],\n",
    "    feed_dict = {\n",
    "        model.X: batch_x[0],\n",
    "        model.W: batch_x[1],\n",
    "        model.Y: batch_y\n",
    "    },\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'lang-detection-w/model.ckpt'"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "saver.save(sess, 'lang-detection-w/model.ckpt')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "import boto3\n",
    "\n",
    "bucketName = 'huseinhouse-storage'\n",
    "Key = 'lang-detection-w/model.ckpt.data-00000-of-00001'\n",
    "outPutname = \"v34/language-detection/model.ckpt.data-00000-of-00001\"\n",
    "\n",
    "s3 = boto3.client('s3')\n",
    "s3.upload_file(Key,bucketName,outPutname)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "Key = 'lang-detection-w/model.ckpt.index'\n",
    "outPutname = \"v34/language-detection/model.ckpt.index\"\n",
    "\n",
    "s3 = boto3.client('s3')\n",
    "s3.upload_file(Key,bucketName,outPutname)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [],
   "source": [
    "Key = 'lang-detection-w/model.ckpt.meta'\n",
    "outPutname = \"v34/language-detection/model.ckpt.meta\"\n",
    "\n",
    "s3 = boto3.client('s3')\n",
    "s3.upload_file(Key,bucketName,outPutname)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [],
   "source": [
    "Key = 'bow-language-detection.pkl'\n",
    "outPutname = \"v34/language-detection/bow-language-detection.pkl\"\n",
    "\n",
    "s3 = boto3.client('s3')\n",
    "s3.upload_file(Key,bucketName,outPutname)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [],
   "source": [
    "Key = 'language-detection.model'\n",
    "outPutname = \"v34/language-detection/language-detection.model\"\n",
    "\n",
    "s3 = boto3.client('s3')\n",
    "s3.upload_file(Key,bucketName,outPutname)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
